package com.analysis

//销量最高的十种商品
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.functions._

object TopProductsBySales {
  def main(args: Array[String]): Unit = {
    val spark = SparkSession.builder()
      .appName("Top 10 Products by Sales Quantity")
      .master("local[*]")
      .getOrCreate()

    val cleanedDF = spark.read.parquet("output/cleaned_data.parquet")

    val resultDF = cleanedDF
      .filter(col("Description").isNotNull)
      .groupBy("Description")
      .agg(sum("Quantity").as("TotalQuantity"))
      .orderBy(desc("TotalQuantity"))
      .limit(10)

    resultDF.write
      .mode("overwrite")
      .json("output/analysis_results/top_products_by_sales")

    resultDF.show()

    spark.stop()
  }
}